# -*- coding: utf-8 -*-
"""Example of using Deep Isolation Forest for
outlier detection"""
# Author: Hongzuo Xu <hongzuoxu@126.com>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import os
import sys

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
	os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

from pyod.models.dif import DIF
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print

if __name__ == "__main__":
	contamination = 0.1  # percentage of outliers
	n_train = 1000  # number of training points
	n_test = 200  # number of testing points
	n_features = 30  # number of features

	# Generate sample data
	X_train, X_test, y_train, y_test = \
		generate_data(n_train=n_train,
					  n_test=n_test,
					  n_features=n_features,
					  contamination=contamination,
					  random_state=42)

	# train deep isolation forest detector
	clf_name = 'DIF'
	clf = DIF()
	clf.fit(X_train)

	# get the prediction labels and outlier scores of the training data
	y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
	y_train_scores = clf.decision_scores_  # raw outlier scores

	# get the prediction on the test data
	y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
	y_test_scores = clf.decision_function(X_test)  # outlier scores

	# evaluate and print the results
	print("\nOn Training Data:")
	evaluate_print(clf_name, y_train, y_train_scores)
	print("\nOn Test Data:")
	evaluate_print(clf_name, y_test, y_test_scores)
